*---------------------------------------FORMAT_AA_PARCEL_ADDRESSES.DO-----------------------------------------
*This script prepares an address table of AA parcels for use with GIS software to map addresses to neighborhoods.
*This script differs only from format_AA_sale_addresses.do in that it preserves additional address information from 
*the later tax roll files to improve address-matching with poorly-formatted address information from MLS listings.

*Sebastien Bradley 5/12/10

clear all
capture cd "C:/Users/Sebastien/Documents/Research/PropertyTaxes/Ann_Arbor" 
set memory 64m
capture log close
set more off

capture use AA_tax_roll.dta, clear

rename propaddresscombined address
drop if propertyclass~=401
sort pid


*Add city and zip code information to property addresses from owner information, where possible:
#delimit ;
gen city = "ANN ARBOR";
gen state = "MI";
replace ownercity = upper(ownercity);
gen zip = substr(ownerzipcode,1,5) if address==ownerstreetaddress;

*Many owner addresses have missing street types, so match on street number and name;
replace zip = substr(ownerzipcode,1,5) if address~=ownerstreetaddress & ownercity=="ANN ARBOR" & 
	word(address,1)==word(ownerstreetaddress,1) & word(address,2)==word(ownerstreetaddress,2);
*In fact, visual inspection shows that matching only on street number is highly accurate;
replace zip = substr(ownerzipcode,1,5) if address~=ownerstreetaddress & ownercity=="ANN ARBOR" & 
	word(address,1)==word(ownerstreetaddress,1);
replace zip = "" if address=="5 ROCKLAND CT";
*In an additional large number of cases, owner addresses have neighboring street numbers (often times
*with a single owner for many properties).  Assuming zip codes do not separate properties located on the
*same street;
replace zip = substr(ownerzipcode,1,5) if address~=ownerstreetaddress & ownercity=="ANN ARBOR" & 
	word(address,2)==word(ownerstreetaddress,2) & word(address,3)==word(ownerstreetaddress,3);
	
*Repeat for properties with missing owner city and state information but where street addresses match;
replace zip = substr(ownerzipcode,1,5) if ownercity=="" & ownerstate=="" & zip=="" & ownerzipcode~="" & 
	word(address,1)==word(ownerstreetaddress,1) & word(address,2)==word(ownerstreetaddress,2);
*Matching only on street number is valid;
replace zip = substr(ownerzipcode,1,5) if ownercity=="" & ownerstate=="" & zip=="" & ownerzipcode~="" & 
	word(address,1)==word(ownerstreetaddress,1);
*Neighboring owners;
replace zip = substr(ownerzipcode,1,5) if ownercity=="" & ownerstate=="" & zip=="" & ownerzipcode~="" & 
	word(address,2)==word(ownerstreetaddress,2) & word(address,3)==word(ownerstreetaddress,3);	
	
*Miscellaneous;
drop if regexm(upper(address), "VACANT")==1;									/*Vacant properties*/
replace address = subinword(address,"CRS","CRES",1);							/*GIS address locator uses different abbreviations*/
replace address = subinword(address,"CRESCENT","CRES",1);
replace address = subinword(address,"LA","LN",1) if word(address,-1)=="LA"; 	/*Only replace street type, not street name*/
replace address = subinword(address,"LANE","LN",1);
replace address = subinword(address,"BLV","BLVD",1);
replace address = subinword(address,"AV","AVE",1);
replace address = subinword(address,"AVENUE","AVE",1);
replace address = subinstr(address,"EMBER WAY","EMERALD AVE",1);
replace address = subinstr(address,"BEACON HILL ST","BEACON HILL",1);

	
destring zip, replace;
#delimit cr


*Reduce dataset to unique addresses
sort pid zip
drop if pid==pid[_n-1]


keep pid pid_long address city state zip ownerstreetaddress ownercity ownerstate ownerzipcode
order pid address city state zip ownerstreetaddress ownercity ownerstate ownerzipcode pid_long
outsheet using AA_parcel_addresses.txt, names replace

/**/
